\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\definecolor{ugreen}{rgb}{0,0.5,0}


%%% outline
%-------------------------------------------------------------------------

\begin{tikzpicture}

{\scriptsize
\node [anchor=north west] (entry1) at (0,0) {\textbf{1:} 这 / 是 / 数据};
\node [anchor=north west] (entry2) at ([yshift=0.1em]entry1.south west) {\textbf{2:} 现在 / 已经 / 实现};
\node [anchor=north west] (entry3) at ([yshift=0.1em]entry2.south west) {\textbf{3:} 确实 / 有 / 很 / 多};
\node [anchor=north west] (entry4) at ([yshift=0.1em]entry3.south west) {...};
\node [anchor=south west] (corpuslabel) at (entry1.north west) {{\color{ublue} \textbf{学习用数据}}};

\begin{pgfonlayer}{background}
\node[rectangle,draw=ublue,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (entry1) (entry2) (entry3) (entry4) (corpuslabel)] (corpus) {};
\end{pgfonlayer}
}

\node [anchor=west,ugreen] (P) at ([xshift=5.2em,yshift=-0.8em]corpus.east){\large{P($\cdot$)}};
\node [anchor=south] (modellabel) at (P.north) {{\color{ublue} {\scriptsize \textbf{统计模型}}}};

\begin{pgfonlayer}{background}
\node[rectangle,draw=ublue,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (P) (modellabel)] (model) {};
\end{pgfonlayer}

\draw [->,very thick,ublue] ([xshift=0.2em]corpus.east) -- ([xshift=4.2em]corpus.east)  node [pos=0.5, above] {\color{red}{\scriptsize{统计学习}}};

\draw [->,very thick,ublue] ([xshift=0.2em]model.east) -- ([xshift=4.2em]model.east)  node [pos=0.5, above] {\color{red}{\scriptsize{搜索\&计算}}};

{\scriptsize
\node [anchor=north west] (sentlabel) at ([xshift=6.2em,yshift=-1em]model.north east) {\color{red}{自动分词系统}};
\node [anchor=north west] (sent) at (sentlabel.south west) {\textbf{对任意句子进行分词}};
}

\begin{pgfonlayer}{background}
\node[rectangle,draw=ublue,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (sentlabel) (sent)] (segsystem) {};
\end{pgfonlayer}

{\footnotesize
{
\node [anchor=west] (label1) at (0,6em) {实际上，通过学习我们得到了一个分词模型P($\cdot$)，给定任意的分词结果};
\node [anchor=north west] (label1part2) at ([yshift=0.5em]label1.south west) {$W=w_1 w_2...w_n$，都能通过P($W$)=$\textrm{P}(w_1) \cdot \textrm{P}(w_2) \cdot ... \cdot \textrm{P}(w_n)$ 计算这种分词的\hspace{0.13em} };
\node [anchor=north west] (label1part3) at ([yshift=0.5em]label1part2.south west) {概率值};
}

\begin{pgfonlayer}{background}
{
\node[rectangle,fill=blue!10,thick,dotted,inner sep=0.2em] [fit = (label1) (label1part2) (label1part3)] (label1content) {};
}
\end{pgfonlayer}

{
\draw [-,thick,red,dotted] ([yshift=0.3em]modellabel.north) ..controls +(north:0.5) and +(south:0.5).. ([xshift=-3em]label1content.south);
}

}

{\footnotesize
{
\node [anchor=west] (label1) at (0,-6.8em) {\textbf{自动分词系统}：对任意的数据句子$S$，找到最佳的分词结果$W^{*}$输出};
}
{
\node [anchor=north west] (label2) at (label1.south west) {假设输入$S$=“确实现在数据很多”};
}
{
\node [anchor=north west,draw,thick,inner sep=2pt] (data11) at (label2.south west) {枚举所有可能的切分};
}
{
\node [anchor=west,draw,thick,inner sep=2pt] (data12) at ([xshift=4em]data11.east) {计算每种切分的概率};
}
{
\node [anchor=west,draw,thick,inner sep=2pt] (data13) at ([xshift=4.0em]data12.east) {选择最佳结果};
}
{
\draw [->,thick] ([xshift=0.1em]data11.east) -- ([xshift=-0.1em]data12.west);
}
{
\draw [->,thick] ([xshift=0.1em]data12.east) -- ([xshift=-0.1em]data13.west);
}

{\scriptsize
{
\node [anchor=north west] (data21) at (data11.south west) {确/实现/在/数/据很/多};
}
{
\node [anchor=north west] (data22) at (data12.south west) {$\textrm{P}(\textrm{“确”}) \cdot \textrm{P}(\textrm{“实现”}) \cdot \textrm{P}(\textrm{“在”}) \cdot \textrm{P}(\textrm{“数”}) \cdot $};
}
\node [anchor=north west,minimum height=1.6em] (data23) at (data13.south west) {};
\node [anchor=north west,minimum height=1.6em] (data31) at ([yshift=0.3em]data21.south west) {};
{
\node [anchor=north west] (data32) at ([yshift=0.3em]data22.south west) {$\textrm{P}(\textrm{“据很”}) \cdot  \textrm{P}(\textrm{“多”}) = 2.13 \times 10^{-45}$};
}
\node [anchor=north west,minimum height=1.6em] (data33) at ([yshift=0.3em]data23.south west) {};

{
\node [anchor=north west] (data41) at (data31.south west) {确实/现在/数据/很多};
}
{
\node [anchor=north west] (data42) at (data32.south west) {$\textrm{P}(\textrm{“确实”}) \cdot \textrm{P}(\textrm{“现在”}) \cdot \textrm{P}(\textrm{“数据”}) \cdot $};
}
{
\node [anchor=north west] (data43) at ([yshift=-0.2em,xshift=3em]data33.south west) {\color{red}{\textbf{输出}}};
\draw [->,red,thick] (data43.west)--([xshift=-1em]data43.west);
}
{
\node [anchor=north west] (data51) at (data41.south west) {...};
}
{
\node [anchor=north west] (data52) at ([yshift=0.3em]data42.south west) {$\textrm{P}(\textrm{“很”}) \cdot  \textrm{P}(\textrm{“多”}) = 1.54 \times 10^{-25}$};
}
\node [anchor=north west] (data53) at ([yshift=0.3em]data43.south west) {};
}
}

\begin{pgfonlayer}{background}
{
\node[rectangle,fill=blue!10,thick,dotted,inner sep=0.1em] [fit = (label1) (data11) (data13) (data51) (data52) (data53)] (segcontent) {};
}
\end{pgfonlayer}

{
\draw [-,thick,red,dotted] (segcontent.north) ..controls +(north:0.7) and +(south:0.7).. (segsystem.south);
}

\end{tikzpicture}




